Load Data

load("./curves.RData")

Plotting Original Data

real_df = by_country %>% 
  ungroup(country_region) %>% 
  mutate(
    group = replicate(length(by_country$country_region), 0)
  ) %>% 
  group_by(country_region) %>% 
  mutate(
    log_cases = log10(confirmed_cases),
    max_log = max(log_cases)
  ) %>% 
  dplyr::select(-log_cases) %>% 
  arrange(max_log) 

for (i in 1:length(real_df$country_region)) {
  if (real_df$max_log[i] < 2) {
    real_df$group[i] = "0 - 99"
  } else if (real_df$max_log[i] < log10(500)) {
    real_df$group[i] = "100 - 499"
  } else if (real_df$max_log[i] < 3) {
    real_df$group[i] = "500 - 999"
  } else if (real_df$max_log[i] < log10(5000)) {
    real_df$group[i] = "1000 - 4999"
  } else if (real_df$max_log[i] < 4) {
    real_df$group[i] = "5000 - 9999"
  } else if (real_df$max_log[i] < 5) {
    real_df$group[i] = "10000 - 99999"
  } else {
    real_df$group[i] = "100000 +"
  }
}

real_df$group = factor(real_df$group, levels = c("0 - 99", "100 - 499", "500 - 999", "1000 - 4999", "5000 - 9999", "10000 - 99999", "100000 +"), ordered = TRUE)

real_df %>% 
  ggplot(aes(x = t, y = confirmed_cases)) +
  geom_path(aes(color = country_region)) +
  facet_grid(group ~ ., scales = "free") +
  theme(legend.position = "none") +
  geom_dl(aes(label = country_region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for All Countries (Real Data)",
    x = "Days Since First Case",
    y = "Cumulative Cases (Grouped by Total)"
  )

ggsave("./visualizations/real_data_plotted.jpg", width = 12, height = 8)

Plotting Estimates

Plotting Top 20 Countries with Greatest Population

fitted_list_2 = as.data.frame(unlist(fitted_list)) %>% 
  janitor::clean_names() %>% 
  rownames_to_column(var = "country") %>% 
  mutate(
    t = as.numeric(gsub("[^[:digit:]]", "", country)) - 1,
    region = gsub("[[:digit:]]","", country)) %>% 
  dplyr::select(region, t, cases = unlist_fitted_list)

large_pop_list = c("China", "India", "US", "Indonesia", "Pakistan", "Brazil", "Nigeria", "Bangladesh", "Russia", "Mexico", "Japan", "Ethiopia", "Phillipines", "Egypt", "Vietnam", "Congo (Kinshasa)", "Turkey", "Iran", "Germany", "Thailand")

top_population = fitted_list_2 %>%
  filter(region %in% large_pop_list) %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for Top 20 Countries in Population",
    x = "Days After First Case",
    y = "Cumulative Cases"
  )

ggsave("./visualizations/top_20_regions.jpg", plot = top_population, width = 12, height = 8)

Comparison of Real and Estimated Data

comparison_df = by_country %>% 
  rename(region = country_region) %>% 
  dplyr::select(-fatalities) %>% 
  left_join(fitted_list_2, by = c("region", "t")) %>% 
  rename(estimated_cases = cases) %>% 
  filter(region == "US" | region == "China" | region == "Italy" | region == "Iran") %>% 
  pivot_longer(confirmed_cases:estimated_cases, names_to = "data_type")

comparison_df %>% 
  ggplot(aes(x = t, y = value)) +
  geom_path(aes(color = data_type)) +
  facet_wrap(region ~ ., scale = "free", nrow = 2) +
  labs(
    title = "Real Data vs. Estimated Data from Algorithm for Select Countries",
    x = "Days Since First Case",
    y = "Number of Cumulative Cases"
  )

ggsave("./visualizations/comparison_graph.jpg", width = 12, height = 8)

Plotting Top 20 Countries with Most Cases

top_names = fitted_list_2 %>% 
  group_by(region) %>% 
  mutate(max_cases = max(cases)) %>% 
  dplyr::select(region, max_cases) %>% 
  distinct() %>% 
  arrange(desc(max_cases)) %>% 
  head(20) %>% 
  dplyr::select(region)

top_names = as.tibble(top_names)

top_cases = fitted_list_2 %>%
  filter(region %in% top_names$region) %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for Countries with Most Cases",
    x = "Days After First Case",
    y = "Cumulative Cases"
  )

ggsave("./visualizations/highest_cases.jpg", plot = top_cases, width = 12, height = 8)

Plotting Countries Where Total is Still Growing

growing_df = fitted_list_2 %>% 
  full_join(param_df1, by = "region") 
  
for (i in 1:length(growing_df$region)) {
    if (growing_df$b[i] < 0.08) {
    growing_df$group[i] = "Growing Very Slowly"
  } else if (growing_df$b[i] < 0.15) {
    growing_df$group[i] = "Growing Slowly"
  } else if (growing_df$b[i] < 0.22) {
    growing_df$group[i] = "Growing Moderately"
  } else if (growing_df$b[i] < 0.32) {
    growing_df$group[i] = "Growing Quickly"
  } else {
    growing_df$group[i] = "Growing Very Quickly"
  }
}

growing_df$group = factor(growing_df$group, levels = c("Growing Very Slowly", "Growing Slowly", "Growing Moderately", "Growing Quickly", "Growing Very Quickly"), ordered = TRUE)

growing_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  facet_grid(group ~ ., scales = "free") +
  theme(legend.position = "none") +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for All Countries",
    x = "Days Since First Case",
    y = "Cumulative Cases (Grouped by Growth Rate)"
  )

ggsave("./visualizations/growth_group_plot.jpg", width = 12, height = 8)

Plotting Countries by Continent

continent_df = 
  fitted_list_2 %>% 
    mutate(
      continent = countrycode(sourcevar = fitted_list_2[, "region"],
                              origin = "country.name",
                              destination = "continent")
    )

continent_df$continent[4320:4345] = "Europe"

continent_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  facet_grid(continent ~ ., scales = "free") +
  theme(legend.position = "none") +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for All Countries",
    x = "Days Since First Case",
    y = "Cumulative Cases (Grouped by Continent)"
  )

ggsave("./visualizations/covid_19_continents.jpg", width = 12, height = 8)

Plotting Countries by Number of Cases

number_df = fitted_list_2 %>% 
  mutate(
    group = replicate(length(fitted_list_2$cases), 0)
  ) %>% 
  group_by(region) %>% 
  mutate(
    log_cases = log10(cases),
    max_log = max(log_cases)
  ) %>% 
  dplyr::select(-log_cases) %>% 
  arrange(max_log) 

for (i in 1:length(number_df$region)) {
  if (number_df$max_log[i] < 2) {
    number_df$group[i] = "0 - 99"
  } else if (number_df$max_log[i] < log10(500)) {
    number_df$group[i] = "100 - 499"
  } else if (number_df$max_log[i] < 3) {
    number_df$group[i] = "500 - 999"
  } else if (number_df$max_log[i] < log10(5000)) {
    number_df$group[i] = "1000 - 4999"
  } else if (number_df$max_log[i] < 4) {
    number_df$group[i] = "5000 - 9999"
  } else if (number_df$max_log[i] < 5) {
    number_df$group[i] = "10000 - 99999"
  } else {
    number_df$group[i] = "100000 +"
  }
}

number_df$group = factor(number_df$group, levels = c("0 - 99", "100 - 499", "500 - 999", "1000 - 4999", "5000 - 9999", "10000 - 99999", "100000 +"), ordered = TRUE)

number_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  facet_grid(group ~ ., scales = "free") +
  theme(legend.position = "none") +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases for All Countries",
    x = "Days Since First Case",
    y = "Cumulative Cases (Grouped by Total)"
  )

ggsave("./visualizations/all_covid_cases.jpg", width = 12, height = 8)

Differences in Stay at Home Orders

distancing_df = fitted_list_2 %>% 
  filter(region == "US" | region == "China" | region == "Italy" | region == "Sweden") %>% 
  mutate(population = rep(0, length(cases)))

for (i in 1:length(distancing_df$cases)) {
  if (distancing_df$region[i] == "US") {
    distancing_df$population[i] = 328200000
  } else if (distancing_df$region[i] == "China") {
    distancing_df$population[i] = 1393000000
  } else if (distancing_df$region[i] == "Italy" ) {
    distancing_df$population[i] = 60360000
  } else {distancing_df$population[i] = 10230000}
}

distancing_df = distancing_df %>% 
  mutate(
    percent_infected = (cases / population) * 100
  )

highlights = unlist(x = c(distancing_df[14, ], distancing_df[286, ], distancing_df[214, ]))

distancing_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  geom_point(data = distancing_df[14,], color = "#F8766D") +
  geom_point(data = distancing_df[286,], color = "#C77CFF") +
  geom_point(data = distancing_df[130,], color = "#7CAE00") +
  labs(
    title = "Estimated Curves for Select Countries and Stay At Home Implementation Date",
    x = "Days Since First Case (Black Dot Indicates Implementation of Social Distancing)",
    y = "Cumulative Cases"
  )

ggsave("./visualizations/social_distancing.jpg", width = 12, height = 8)

distancing_df %>% 
  ggplot(aes(x = t, y = percent_infected)) +
  geom_path(aes(color = region)) +
  geom_point(data = distancing_df[14,], color = "#F8766D") +
  geom_point(data = distancing_df[286,], color = "#C77CFF") +
  geom_point(data = distancing_df[130,], color = "#7CAE00") +
  labs(
    title = "Estimated Curves for Select Countries and Stay At Home Implementation Date",
    x = "Days Since First Case (Dot Indicates Implementation of Social Distancing)",
    y = "Cumulative Percentage Infected"
  )

ggsave("./visualizations/social_distancing_percentage.jpg", width = 12, height = 8)

Plotting Clustering Results

K means Clustering

kmeans_df = read_csv("./param_km3_final.csv") %>% 
  group_by(cluster) %>% 
  mutate(
    mean_a = round(mean(a), digits = 0),
    mean_b = round(mean(b), digits = 2),
    mean_c = round(mean(c), digits = 2)
  )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   a_std = col_double(),
##   b_std = col_double(),
##   c_std = col_double(),
##   region = col_character(),
##   a = col_double(),
##   b = col_double(),
##   c = col_double(),
##   cluster = col_double()
## )
kmeans_graphing_df = fitted_list_2 %>% 
  left_join(kmeans_df, by = "region") %>% 
  dplyr::select(region, t, cases, cluster)

labels = c(`1` = "Cluster 1: a = 198895, b = 0.26, c = 39.61", `2` = "Cluster 2: a = 24196, b = 0.08, c = 73.55", `3` = "Cluster 3: a = 7970, b = 0.13, c = 35.23")

kmeans_graphing_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  facet_grid(cluster ~ ., scales = "free", labeller = labeller(cluster = labels)) +
  theme(legend.position = "none") +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases Grouped by K-Means Clustering",
    x = "Days Since First Case",
    y = "Cumulative Cases Grouped by Cluster"
  )

ggsave("./visualizations/kmeans_graph.jpg", width = 12, height = 12)
kmeans_plot = kmeans_graphing_df %>% 
  ggplot(aes(x = t, y = cases, color = cluster, text = region)) +
  geom_path() +
  theme(legend.position = "none") +
  labs(
    title = "Cumulative COVID-19 Cases Grouped by K-Means Clustering",
    x = "Days Since First Case",
    y = "Cumulative Cases Grouped by Cluster"
  )

ggplotly(kmeans_plot, tooltip = "text")

Gaussian Clustering

gmm_df = read_csv("./param_gmm_final.csv") %>% 
  group_by(cluster) %>% 
  mutate(
    mean_a = round(mean(a), digits = 0),
    mean_b = round(mean(b), digits = 2),
    mean_c = round(mean(c), digits = 2)
  )
## Parsed with column specification:
## cols(
##   X1 = col_double(),
##   a_std = col_double(),
##   b_std = col_double(),
##   c_std = col_double(),
##   region = col_character(),
##   a = col_double(),
##   b = col_double(),
##   c = col_double(),
##   cluster = col_double()
## )
gmm_graphing_df = fitted_list_2 %>% 
  left_join(gmm_df, by = "region") %>% 
  dplyr::select(region, t, cases, cluster)

labels_g = c(`1` = "Cluster 1: a = 3983, b = 0.09, c = 55.17", `2` = "Cluster 2: a = 326117, b = 0.16, c = 65.44", `3` = "Cluster 3: a = 11998, b = 0.2, c = 36.34")


gmm_graphing_df %>% 
  ggplot(aes(x = t, y = cases)) +
  geom_path(aes(color = region)) +
  facet_grid(cluster ~ ., scales = "free", labeller = labeller(cluster = labels_g)) +
  theme(legend.position = "none") +
  geom_dl(aes(label = region), method = list(dl.combine("last.points"), cex = 0.7)) +
  labs(
    title = "Cumulative COVID-19 Cases Grouped by Gaussian Mixture Model Clustering",
    x = "Days Since First Case",
    y = "Cumulative Cases Grouped by Cluster"
  )

ggsave("./visualizations/gmm_graph.jpg", width = 12, height = 12)
gmm_plot = gmm_graphing_df %>% 
  ggplot(aes(x = t, y = cases, color = cluster, text = region)) +
  geom_path() +
  theme(legend.position = "none") +
  labs(
    title = "Cumulative COVID-19 Cases Grouped by Gaussian Mixture Model Clustering",
    x = "Days Since First Case",
    y = "Cumulative Cases Grouped by Cluster"
  )

ggplotly(gmm_plot, tooltip = "text")
income_group_df = read_csv("./country_code_gdp_indicators.csv") %>% 
  janitor::clean_names() %>% 
  rename("region" = table_name,
         "global_region" = region) %>% 
  mutate(region = recode(region, 
                         "Bahamas, The" = "Bahamas",
                         "Brunei Darussalam" = "Brunei",
                         "Côte d'Ivoire" = "Cote d'Ivoire",
                         "Congo, Dem. Rep." = "Congo (Kinshasa)",
                         "Congo, Rep." = "Congo (Brazzaville)",
                         "Czech Republic" = "Czechia",
                         "Egypt, Arab Rep." = "Egypt",
                         "Gambia, The" = "Gambia",
                         "Iran, Islamic Rep." = "Iran",
                         "Korea, Rep." = "Korea, South",
                         "Kyrgyz Republic" = "Kyrgyzstan",
                         "Lao PDR" = "Laos",
                         "Russian Federation" = "Russia",
                         "Myanmar" = "Burma",
                         "St. Kitts and Nevis" = "Saint Kitts and Nevis",
                         "St. Vincent and the Grenadines" = "Saint Vincent and the Grenadines",
                         "St. Lucia" = "Saint Lucia",
                         "São Tomé and Principe" = "Sao Tome and Principe",
                         "Slovak Republic" = "Slovakia",
                         "Syrian Arab Republic" = "Syria",
                         "United States" = "US",
                         "Venezuela, RB" = "Venezuela",
                         "Yemen, Rep." = "Yemen"))
## Parsed with column specification:
## cols(
##   `Country Code` = col_character(),
##   Region = col_character(),
##   IncomeGroup = col_character(),
##   SpecialNotes = col_character(),
##   TableName = col_character()
## )
global_bank_df = kmeans_graphing_df %>% 
  left_join(income_group_df, by = "region") %>% 
  filter(!is.na(income_group))

income_group_plot = global_bank_df %>% 
  ggplot(aes(x = t, y = cases, color = cluster, text = region)) +
  geom_path() +
  facet_grid(income_group ~ ., scales = "free", labeller = labeller(cluster = labels)) +
  theme(legend.position = "none") +
  labs(
    title = "Cumulative COVID-19 Cases Grouped by K-Means Clustering",
    x = "Days Since First Case",
    y = "Cumulative Cases Grouped by Cluster"
  )

ggplotly(income_group_plot, tooltip = "text")